*Do-file for "Breaking the Links: Natural Resource Booms and Intergenerational Mobility"

cap log close
clear all
set more off 
cd "/data/"


***************************************************************************************
****Create dataset with all cohorts
***************************************************************************************

*1- info on Earnings, Parents, Year of Birth, Gender
u npid faar fmnd kjonn nfpid nmpid if npid!=0 using "/befmaster.dta", clear
duplicates drop
rename faar foedselsaar
rename kjonn sex
merge 1:1 npid using "/pearn_person1967-2010.dta", keep(master matched) nogen
save "/base_data.dta", replace

*2- Add info on Avg Earnings 
use "/base_data.dta", clear
*Average earnings at age 36-41 and 50-55
gen avg_pearn3641=.
foreach age in 36 55{
	program define avgearn
		local a = `1'+1
		local b = `1'+2
		local c = `1'+3
		local d = `1'+4
		local e = `1'+5
		replace avg_pearn`age'`e'=(pearn_`1'+pearn_`a'+pearn_`b'+pearn_`c'+pearn_`d'+pearn_`e')/6 if foedselsaar+`age'==`1'
	end
	forvalues i=1967(1)2005{
	avgearn `i'
	}
	program drop avgearn
}

*3- info on Education 
use "/edu_level.dta"
keep npid edudt nus2000
gen avgyr=year(edudt)
drop edudt
keep if avgyr <= 2010
merge 1:1 npid using "/base_data.dta", keep(using matched) nogen
save "/base_data.dta", replace

*4- info on Place of Birth
u npid fodes using "/mobilitet_person1990-2010.dta", clear
merge 1:1 npid using "/base_data.dta", keep(master matched) nogen
rename fodes birthplace
save "/base_data.dta", replace

*5- info on Residence by Age 7
use "/base_data.dta", clear
gen year7=foedselsaar+7
save, replace
use "/bustadkommune1967-2011.dta", clear
rename year year7
merge 1:1 npid year7 using "/base_data.dta", keep(master matched) nogen
rename bostedskommune place7
save "/base_data.dta", replace


***************************************************************************************
****Create dataset with cohorts born in 1952-1957 and their fathers
***************************************************************************************

use "/base_data.dta" if foedselsaar>=1952 & foedselsaar<=1957, clear
save "/multigen_data5257.dta", replace

*1- Add info on Fathers 
use "/base_data.dta", clear
drop year7 nfpid nmpid place7 
rename * *_f
rename npid_f nfpid
merge 1:m nfpid using "/multigen_data5257.dta", keep(using matched) nogen
save "/multigen_data5257.dta", replace

*2- Assign each Municipality of Birth to a Local Labor Market
use "/multigen_data5257.dta", clear
destring birthplace, replace
rename birthplace kommune
merge m:1 kommune using "/link_komLLM46.dta", keep(master matched) nogen
rename arbeidsmarked llm
rename kommune birthplace

*3- Assign Municipality of Birth of the Father to a LLM
use "/multigen_data5257.dta", clear
destring birthplace_f, replace
rename birthplace_f kommune
merge m:1 kommune using "/link_komLLM46.dta", keep(master matched) nogen
rename arbeidsmarked broadllm_f
rename kommune birthplace_f
save, replace

*4- Add info on Sons' Earnings Ranks
*Age 36-41 (restrict among those with info on parents and born in Norway)
use "/base_data.dta", clear
gen foreign=int(birthplace/1000)
gen avg_pearn3641_cut=avg_pearn3641 if foreign!=3 & nmpid!=. | foreign!=3 & nfpid!=.
gen rank3641mstrict=.
forvalues i=1931(1)1969{
egen rank`i'= cut(avg_pearn3641_cut) if foedselsaar==`i' & sex==1, group(100) icodes
replace rank3641mstrict=rank`i' if foedselsaar==`i'
drop rank`i'
}
drop foreign 
drop avg_pearn3641_cut
save, replace
*Add to the multigen_data5257 
use npid rank3641m rank3641mstrict using "/base_data.dta", clear
merge 1:m npid using "/multigen_data5257", keep(using matched)
save "/multigen_data5257.dta", replace

*5-Add info on Fathers' Rank (among other fathers with sons in the same cohort)
use "/multigen_data5257.dta", clear
gen rank5055c_f=.
forvalues i=1952(1)1957{
egen rank`i'= cut(avg_pearn5055_f) if foedselsaar==`i', group(100) icodes
replace rank5055c_f=rank`i' if foedselsaar==`i'
drop rank`i'
}
save "/multigen_data5257.dta", replace

*6- Generate Quintiles in each cohort for fathers and kids
gen quintile=1 if rank3641mstrict<=20
replace quintile=2 if rank3641mstrict>20 & rank3641mstrict<=40
replace quintile=3 if rank3641mstrict>40 & rank3641mstrict<=60
replace quintile=4 if rank3641mstrict>60 & rank3641mstrict<=80
replace quintile=5 if rank3641mstrict>80 & rank3641mstrict<=100
gen quintile_f=1 if rank5055c_f<=20
replace quintile_f=2 if rank5055c_f>20 & rank5055c_f<=40
replace quintile_f=3 if rank5055c_f>40 & rank5055c_f<=60
replace quintile_f=4 if rank5055c_f>60 & rank5055c_f<=80
replace quintile_f=5 if rank5055c_f>80 & rank5055c_f<=100
save "/multigen_data5257.dta", replace

*7 - Assign Oil Regions
*First generate a dataset with oil proportions in each LLM in 1980 (from 1980 census)
preserve
u npid kom80 isic80 isic5 using "/fob80.dta" if npid!=0 & isic80!=., clear
rename kom80 kommune
merge m:1 kommune using "/link_komLLM46.dta", keep(master matched) nogen //assign each municipality to a llm
rename arbeidsmarked broadllm
gen isic3=int(isic5/100)
*petroleum sector isic 3-digit definition
gen oil_sec_broad=isic3==220 | isic3==353 |isic3==354 | isic3==382 | isic3==502 | isic3==384 
collapse (count) npid (sum) oil_sec_broad, by(broadllm)
rename broadllm llm
rename npid population
gen prop=oil_sec_broad/popu
label var prop "proportion of workers employed in oil related industry"
sum prop, d
*generate an indicator for the proportion of workers employed in the oil sector
gen oil=2 if prop>0.1
replace oil=1 if prop>0.075 & prop<0.1
replace oil=0 if oil==.
label var oil "2 if prop_oil>10%, 1 if >7.5%, 0 otherwise"
keep llm oil
save oil_proportion1980, replace
restore
*merge with the main dataset
merge m:1 llm using "oil_proportion1980", keep(master) nogen
save "/multigen_data5257.dta", replace

*8- Generate Variables for Education 
gen nus=int(nus2000/100000)
gen college=nus>=6
replace college=. if nus==.
gen academic=0 if nus2000!=.
replace academic=1 if ((nus2000>=300000 & nus2000<314000) | (nus2000>=331000 & nus2000<340000) | (nus2000>=351000 & nus2000<355000)) & nus2000!=.
replace academic=1 if ((nus2000>=400000 & nus2000<414000) | (nus2000>=431000 & nus2000<440000) | (nus2000>=451000 & nus2000<455000)) & nus2000!=.
replace academic=1 if (nus2000>=500000) & nus2000!=.
save "/multigen_data5257.dta", replace

*9- Add info on IQ
use npid born ability using "/sesjon.dta", clear
merge 1:1 npid using "/multigen_data5257.dta", keep(using matched) nogen
rename ability iq
save "/multigen_data5257.dta", replace


***************************************************************************************
****Create dataset with cohorts born in 1932-1933 and info on their fathers
***************************************************************************************

u npid finc fbyear iq using "/cohorts3233.dta" if foedselsaar>=1932 & foedselsaar<=1933
merge 1:1 npid using "/base_data.dta", keepusing(rank3641mstrict avg_pearn3641 birthplace) keep(master matched) nogen

*1-Rank for Fathers
egen frank= cut(finc) if byear==1932, group(100) icodes
egen frank33= cut(finc) if byear==1933, group(100) icodes
replace frank=frank33 if byear==1933
drop frank33

*2- Generate Quintiles od Earnings Distribution for Fathers and Children
gen quintile_f=1 if frank<=20
replace quintile_f=2 if frank>20 & frank<=40
replace quintile_f=3 if frank>40 & frank<=60
replace quintile_f=4 if frank>60 & frank<=80
replace quintile_f=5 if frank>80 & frank<=100
gen quintile=1 if rank3641<=20
replace quintile=2 if rank3641>20 & rank3641<=40
replace quintile=3 if rank3641>40 & rank3641<=60
replace quintile=4 if rank3641>60 & rank3641<=80
replace quintile=5 if rank3641>80 & rank3641<=100

*3- Assign oil regions
destring birthplace, replace
rename birthplace kommune
merge m:1 kommune using "/link_komLLM46.dta", keep(master matched) nogen
rename arbeidsmarked llm
rename kommune birthplace
merge m:1 llm using "/oil_proportion1980", keep(master) nogen
save "/dataset3233.dta"

*4- Assign Academic High School
**Add info academic
use npid avgdatodt nus2000 "/finished_edu.dta", clear
gen year_edu=year(avgdatodt)
gen nus=int(nus2000/100000)
bys npid: egen ma=max(nus)
drop if nus!=ma
gen ahs=0 if nus2000!=.
replace ahs=1 if ((nus2000>=300000 & nus2000<314000) | (nus2000>=331000 & nus2000<340000) | (nus2000>=351000 & nus2000<355000)) & nus2000!=.
replace ahs=1 if ((nus2000>=400000 & nus2000<414000) | (nus2000>=431000 & nus2000<440000) | (nus2000>=451000 & nus2000<455000)) & nus2000!=.
replace ahs=1 if (nus2000>=500000) & nus2000!=.
rename ahs academic
drop if ma!=academic
merge 1:1 npid using "/dataset3233.dta", keep(using matched) nogen
save "/dataset3233.dta"


***************************************************************************************
****Create dataset with children of men born in 1952-1957
***************************************************************************************

use "/multigen_data5257.dta", clear
keep if sex==1
rename * *_f
rename *_f_f *_gf
rename npid_f nfpid
rename nfpid_f npid_gf 
merge 1:m nfpid using "/base_data.dta", keep(master matched) nogen
save "/multigen_data_3g.dta", replace

*1- Add info on Academic Education for the Different Generations
gen academic=0 if nus2000!=.
replace academic=1 if ((nus2000>=300000 & nus2000<314000) | (nus2000>=331000 & nus2000<340000) | (nus2000>=351000 & nus2000<355000)) & nus2000!=.
replace academic=1 if ((nus2000>=400000 & nus2000<414000) | (nus2000>=431000 & nus2000<440000) | (nus2000>=451000 & nus2000<455000)) & nus2000!=.
replace academic=1 if (nus2000>=500000) & nus2000!=.
gen academic_f=0 if nus2000_f!=.
replace academic_f=1 if ((nus2000_f>=300000 & nus2000_f<314000) | (nus2000_f>=331000 & nus2000_f<340000) | (nus2000_f>=351000 & nus2000_f<355000)) & nus2000_f!=.
replace academic_f=1 if ((nus2000_f>=400000 & nus2000_f<414000) | (nus2000_f>=431000 & nus2000_f<440000) | (nus2000_f>=451000 & nus2000_f<455000)) & nus2000_f!=.
replace academic_f=1 if (nus2000_f>=500000) & nus2000_f!=.
gen academic_gf=0 if nus2000_gf!=.
replace academic_f=1 if ((nus2000_gf>=300000 & nus2000_gf<314000) | (nus2000_gf>=331000 & nus2000_gf<340000) | (nus2000_gf>=351000 & nus2000_gf<355000)) & nus2000_gf!=.
replace academic_f=1 if ((nus2000_gf>=400000 & nus2000_gf<414000) | (nus2000_gf>=431000 & nus2000_gf<440000) | (nus2000_gf>=451000 & nus2000_gf<455000)) & nus2000_gf!=.
replace academic_f=1 if (nus2000_gf>=500000) & nus2000_gf!=.
save "/multigen_data_3g.dta", replace

*2- Add info on earnings at age 30 of the third generation
use "/multigen_data_3g.dta", clear
gen year=foedselsaar+30
merge 1:1 npid year using "/dispinnt_1967-2014.dta", keep(master matched) nogen
label var pearn "Earnings at age 30"
drop year dispinnt btoinnt ntoinnt 
rename pearn pearn30
save, replace


***************************************************************************************
****Table 1 - Rank-Rank Regression
***************************************************************************************
*1- Cohorts 32-33
use "/dataset3233.dta", clear
*Exclude the regions which are "middle oil", gen dummies for high-oil
drop if oil==1
gen high=oil==2
*Gen age of Father at the Birth of the Son
rename fbyear foedselsaar_f 
gen age_f=foedselsaar-foedselsaar_f
gen age_f2=age_f^2
gen highoil=rank5055c_f*high
reg rank3641mstrict frank highoil i.foedselsaar age_f age_f2 i.llm, vce (cl birthplace)

*2- Cohorts 1952-1957 (1 for men, 2 for women)
use "/multigen_data5257.dta", clear
foreach g in 1 2{
	keep if sex==`g'
	*Data: exclude the regions which are "middle oil", gen dummies for high-oil
	drop if oil==1
	gen high=oil==2
	*gen age of father at the birth of the son
	gen age_f=foedselsaar-foedselsaar_f
	gen age_f2=age_f^2
	gen highoil=rank5055c_f*high
	reg rank3641mstrict rank5055c_f highoil i.foedselsaar age_f age_f2 i.llm, vce (cl birthplace)

	*Adjusting earnings distribution to be the same in high and low oil regions
	use "/multigen_data5257.dta", clear
	keep if sex==`g'
	*rank children within each oil region
	forvalues i=0(1)2{
		display `i'
		gen rank_`i'=.
			forvalues y=1952(1)1957{
			display `y'
			egen rank`y'= cut(avg_pearn) if foedselsaar==`y' & oil==`i', group(100) icodes
			replace rank_`i'=rank`y' if foedselsaar==`y' & oil==`i'
			drop rank`y'
		}
	}
	gen rank=rank_0
	replace rank=rank_1 if rank==.
	replace rank=rank_2 if rank==.
	drop rank_1 rank_2 rank_0
	rename avg pearn
	tempfile temp
	save`temp'
	*compare the average level of income within each percentile of rank between high and low oil 
	collapse pearn, by(oil rank)
	reshape wide pearn, j(oil) i(rank)
	gen diff0=pearn2-pearn0
	gen diff1=pearn2-pearn1
	keep rank diff0 diff1
	*add the difference to each rank for the low oil region 
	merge 1:m rank using `temp'
	drop if _merge==1
	drop _merge
	gen earn_adj=pearn
	replace earn_adj=pearn+diff0 if oil==0
	replace earn_adj=pearn+diff1 if oil==1
	*rank all individuals at the national level
	cap drop rank
	gen rank=.
	forvalues i=1952(1)1957{
	egen rank`i'= cut(earn_adj) if foedselsaar==`i', group(100) icodes
	replace rank=rank`i' if foedselsaar==`i'
	drop rank`i'
	}
	*analysis
	drop if oil==1
	gen high=oil==2
	gen age_f=foedselsaar-foedselsaar_f
	gen age_f2=age_f^2
	gen highoil=rank5055c_f*high
	reg rank rank5055c_f highoil i.foedselsaar age_f age_f2 i.llm, vce (cl birthplace)
}
*


***************************************************************************************
****Tables 2, 3 and 4 - Quintile Matrices
***************************************************************************************
*1- Cohorts 1952-1957
use "/multigen_data5257.dta", clear
foreach g in 1 2{
	keep if sex==`g'
	putexcel set "/qq_matrices_`g'.xls", replace
	putexcel describe
	forvalues i=1(1)5{
		local a=(`i'*4)-3
		local b=`a'+1
		local c=`a'+2
		local ascii 65
		local letter=char(`ascii')
		putexcel `letter'`a' = "Fathers in quintile `i'"
		putexcel `letter'`b' = "Difference high-low"
		putexcel `letter'`c' = "p-value for a ttest of the difference"
		forvalues l=1(1)5{
			local ascii 65+`l'
			local letter=char(`ascii')
			preserve
			drop if oil==1
			gen high=oil==2
			display "father `i', son `l'"
			keep if quintile_f==`i' 
			gen q`l'=quintile==`l'
			ttest q`l', by(high)
			local mu1=round(`r(mu_1)'*100, 0.1)
			local diff=round((`r(mu_2)'-`r(mu_1)')*100, 0.1)
			if `r(p)'>=0.001 local p= round(`r(p)', 0.001)
			if `r(p)'<0.001 local p= ".000"
			local p="0`p'"
			putexcel `letter'`a' = `mu1'
			putexcel `letter'`b' = `diff'		
			putexcel `letter'`c' = "(`p')"		
			restore
			}
		}
}
	

*2- Cohorts 1932-1933
use "dataset3233.dta", clear
putexcel set "/qq_matrices3233.xls", replace
forvalues i=1(1)5{
	local a=(`i'*4)-3
	local b=`a'+1
	local c=`a'+2
	local ascii 65
	local letter=char(`ascii')
	putexcel `letter'`a' = "Fathers in quintile `i'"
	putexcel `letter'`b' = "Difference high-low"
	putexcel `letter'`c' = "p-value for a ttest of the difference"
	forvalues l=1(1)5{
		local ascii 65+`l'
		local letter=char(`ascii')
		preserve
		drop if oil==1
		gen high=oil==2
		display "father `i', son `l'"
		keep if quintile_f==`i' 
		gen q`l'=quintile==`l'
		ttest q`l', by(high)
		local mu1=round(`r(mu_1)'*100, 0.1)
		local diff=round((`r(mu_2)'-`r(mu_1)')*100, 0.1)
		if `r(p)'>=0.001 local p= round(`r(p)', 0.001)
		if `r(p)'<0.001 local p= ".000"
		local p="0`p'"
		putexcel `letter'`a' = `mu1'
		putexcel `letter'`b' = `diff'		
		putexcel `letter'`c' = "(`p')"		
		restore
		}
	}

	
	
***************************************************************************************
****Table 5 - Returns to Education and Cognitive Ability
***************************************************************************************	
*1- Return to Academic HS, Cohorts 1932-1933
use "/dataset3233.dta", clear
drop if oil==1
gen high=oil==2
gen learn=log(avg_pearn3641)
drop if nus==9
gen highacademic=high*academic
reg learn academic highacademic i.foedselsaar i.llm if frank!=., vce(cl birthplace)

*2- Returns to IQ, Cohorts 1932-1933
use "/dataset3233.dta", clear
egen iq_corr=group(iq)
drop iq
gen hiq=iq>6 
replace hiq=. if iq==.
gen hiq_high=hiq*high
reg learn hiq hiq_high i.foedselsaar i.llm if frank!=., vce(cl birthplace) 

*3- Return to Academic HS and College, Cohorts 1952-1957
foreach g in 1 2{
	use "/multigen_data5257", clear
	keep if sex==`g'
	drop if oil==1
	gen high=oil==2
	gen learn=log(avg_pearn3641)
	drop if nus==9
	gen highacademic=high*academic
	reg learn academic highacademic i.foedselsaar i.llm if rank5055m_f!=. & nus!=.,  vce(cl birthplace)
	gen highcollege=high*college36
	reg learn college highcollege i.foedselsaar i.llm if rank5055m_f!=. & nus!=.,  vce(cl birthplace)
}

*4- Returns to IQ, Cohorts 1952-1957 [only men]
use "/multigen_data5257", clear
keep if sex==1
gen hiq=iq>6 
replace hiq=. if iq==.
gen hiq_high=hiq*high
reg learn hiq hiq_high i.foedselsaar i.llm if rank5055m_f!=., vce(cl birthplace) 

*5- Returns to Academic by Age 30, 3rd Generation
foreach g in 1 2{
	use "/multigen_data_3g.dta", clear
	keep if sex==`g'
	drop if llm_f==.
	drop if rank5055c_gf==.
	drop if rank3641mstrict_f==.
	drop if academic==.
	drop if oil==.
	drop if pearn30==. //drop those for whom pearn is not present
	drop if nus2000>=900000
	gen high=oil_f==2
	drop if oil_f==1
	gen highacademic=high*academic
	gen learn=log(avg_pearn3040)
	reg learn academic highacademic i.foedselsaar i.llm_f ,  vce(cl birthplace)
}

	
***************************************************************************************
****Table 6 - Rank-Rank Regression across 3 Generations
***************************************************************************************	
foreach g in 1 2{
	use "/multigen_data_3g.dta", clear
	keep if sex==`g'
	drop if rank5055c_gf==.
	drop if rank3641mstrict_f==.
	drop if academic==.
	drop if oil==.
	drop if pearn30==. //drop those for whom pearn is not present
	gen rank100=.
	forvalues i=1952(1)1957{
	egen rank`i'= cut(pearn30) if foedselsaar_f==`i', group(100) icodes
	replace rank100=rank`i' if foedselsaar_f==`i'
	drop rank`i'
	}
	replace rank100=rank100+1
	drop if oil_f==1
	gen high=oil_f==2
	gen age_f=foedselsaar-foedselsaar_f //gen age of father at the birth of the son
	gen age_f2=age_f^2
	gen highoil_gf=rank5055c_gf*high
	reg rank100 rank5055c_gf highoil_gf i.foedselsaar_f age_f age_f2 i.llm_f, vce (cl birthplace_f)
	gen highoil_f=rank3641mstrict_f*high
	reg rank100 rank3641mstrict_f highoil_f i.foedselsaar_f age_f age_f2 i.llm_f, vce (cl birthplace_f)
	reg rank100 rank5055c_gf highoil_gf rank3641mstrict_f highoil_f  i.foedselsaar_f age_f age_f2 i.llm_f, vce (cl birthplace_f)
}


***************************************************************************************
****Figure 1 - Association between 3rd Generation Rank and the Ranks of Other Generations
***************************************************************************************	
*Build data (figures code in R)
foreach g in 1 2{
	use "/multigen_data_3g.dta", clear
	keep if sex==`g'
	gen gfrank20=.
	forvalues i=1952(1)1957{
		egen rank`i'= cut(rank5055c_gf) if foedselsaar_f==`i', group(20) icodes
		replace gfrank20=rank`i' if foedselsaar_f==`i'
		drop rank`i'
	}
	replace gfrank20=gfrank20+1
	gen frank20=.
	forvalues i=1952(1)1957{
		egen rank`i'= cut(rank3641mstrict_f) if foedselsaar_f==`i', group(20) icodes
		replace frank20=rank`i' if foedselsaar_f==`i'
		drop rank`i'
	}
	replace frank20=frank20+1
	*rank also grandchildren among those with fathers born in the same year
	gen rank20=.
	forvalues i=1952(1)1957{
		egen rank`i'= cut(pearn30) if foedselsaar_f==`i', group(20) icodes
		replace rank20=rank`i' if foedselsaar_f==`i'
		drop rank`i'
	}
	replace rank20=rank20+1
	collapse rank20, by(gfrank20 frank20 oil)
	outsheet using "/3gearn_`g'.csv" , comma 
}

